home *** CD-ROM | disk | FTP | other *** search
- (require 'cl)
- (require 'cl-19)
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;
- ;;; PARCIL - A Parser for C syntax In Lisp
- ;;; version 0.1a
- ;;;
- ;;; copyright (c) 1992 by Erann Gat, all rights reserved
- ;;;
- ;;; Ported to elisp by Harvey Stein <abel@netvision.net.il>, and
- ;;; eventually <hjstein@netvision.net.il>.
- ;;; Modified in various minor ways for handling Fortran.
- ;;; Added function unparcil - the inverse of parcil.
- ;;;
- ;;; This program is free software; you can redistribute it and/or modify
- ;;; it under the terms of the GNU General Public License as published by
- ;;; the Free Software Foundation.
- ;;;
- ;;; This program is distributed in the hope that it will be useful,
- ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;;; GNU General Public License for more details.
- ;;;
- ;;; You should have received a copy of the GNU General Public License
- ;;; along with this program; if not, write to the Free Software
- ;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ;;;
- ;;;
- ;;; This is a very preliminary release and almost certainly contains bugs.
- ;;; Please send bug reports and comments to:
- ;;; Erann Gat
- ;;; JPL MS 525-3660
- ;;; 4800 Oak Grove Drive
- ;;; Pasadena, CA 91109
- ;;; (818) 306-6176
- ;;; gat@robotics.jpl.nasa.gov or gat@aig.jpl.nasa.gov
- ;;;
- ;;; Revision history:
- ;;; v0.1a - Initial release
- ;;;
-
- ;;; PARCIL is a parser for a subset of the syntax for the C programming
- ;;; language. PARCIL is written in Common Lisp, making it potentially
- ;;; a useful building block for user interfaces for people who do not
- ;;; like prefix syntax.
- ;;;
- ;;; PARCIL is a recursive descent parser optimized to parse C. This makes it
- ;;; fairly brittle and difficult to modify. However, it does make it fairly
- ;;; fast, and it also allows the parser to deal with lots of C idiosyncrasies
- ;;; which are difficult to implement in general-purpose parsers, e.g. operator
- ;;; precedence, prefix and postfix operators, etc.
- ;;;
- ;;; NOTE: While PARCIL is designed to be a component in user interfaces for
- ;;; people who are not regular LISP users, it is probably not usable for that
- ;;; purpose as-is. There are two major problems with it. First, it is incomplete.
- ;;; It currently includes no support for any high-level C construct (i.e. it
- ;;; implements the syntax described in the original Kernighan and Richie book,
- ;;; section 18.1). The second problem is that PARCIL is so faithful to C syntax
- ;;; that it can easily fool the unwary into believing that they are writing C code
- ;;; when in fact they are writing LISP code, only with a different syntax. You
- ;;; need a fairly deep understanding of the distinction between syntax and
- ;;; semantics in order to use PARCIL. The main stumbling block to its use by
- ;;; beginners is that PARCIL does very little error checking. Thus, many errors
- ;;; which should be detected by PARCIL are passed on and caught by LISP. The
- ;;; resulting error messages can be very cryptic if you don't know what's going
- ;;; on.
- ;;;
- ;;; PHILOSOPHICAL RANT: Infix notation is a blight on the intellectual landscape.
- ;;; It is confusing to read, difficult to parse, and to avoid ambiguity must rely
- ;;; on precedence rules that are hopelessly obscure. People who prefer infix
- ;;; notation do so only because they have been indoctrinated to it since
- ;;; childhood and do not have the intellectual strength to break free. It is
- ;;; far better to convince people to use prefix notation, with its easy to read
- ;;; and easy to parse, unambiguous syntax, than to provide them with crutches
- ;;; such as PARCIL which perpetuate such evils as infix, prefix and postfix unary
- ;;; operators. (In C, "x++*++****y" is a legal expression, and the first * doesn't
- ;;; mean the same thing as all the other *'s.) Nevertheless, I acknowledge the
- ;;; reality that infix and C are here to stay, and that is why I have written
- ;;; PARCIL. But that doesn't mean I have to like it.
- ;;;
- ;;; USER'S GUIDE:
- ;;;
- ;;; The top-level function is called PARCIL. Pass a string consisting of a C
- ;;; expression (not a command!) to PARCIL and it will return a parsed version.
- ;;; For example:
- ;;;
- ;;; (parcil "x=y*sin(pi/2.7)") ==> (SETF X (* Y (SIN (/ PI 2.7))))
- ;;;
- ;;; PARCIL supports all syntax defined in section 18.1 of the original Kernighan
- ;;; and Ritchie book, plus all C numerical syntax including floats and radix
- ;;; syntax (i.e. 0xnnn, 0bnnn, and 0onnn). In addition, PARCIL supports multiple
- ;;; array subscripts. There is also a preliminary version of {} blocks, but it
- ;;; doesn't quite do the right thing. Parcil also allows strings to be delimited
- ;;; using single quotes as well as double quotes (but you must use the same type
- ;;; to close the string as you did to open it).
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;
- ;;; Program starts here
- ;;;
-
- ;;; Misc. utilities
- ;;;
- ;;;(defmacro iterate (name args &rest body)
- ;;; `(labels ((,name ,(mapcar #'car args) ,@body))
- ;;; (,name ,@(mapcar #'cadr args))))
-
- (defmacro parcil:iterate (name args &rest body)
- (` (labels (((, name) (,(mapcar 'car args)) (,@ body)))
- ( (, name) (,@ (mapcar 'cadr args))))))
-
-
- ;;(defmacro while (condition &body body)
- ;; `(parcil:iterate loop () (if ,condition (progn ,@body (loop)))))
-
- ;;; Crufty pseudo-text-file interface. Don't let impressionable young minds
- ;;; see this code.
- ;;;
- (defvar *the-string* "")
- (defvar *the-pointer* 0)
-
- (defun parcil:parse-init (s)
- (setq *the-string* s)
- (setq *the-pointer* 0))
-
- (defun parcil:eof (&optional offset)
- (if (null offset) (setq offset 0))
- (>= (+ *the-pointer* offset) (length *the-string*)))
-
- (defun parcil:peek (&optional offset)
- (if (null offset) (setq offset 0))
- (if (parcil:eof offset)
- nil
- (elt *the-string* (+ *the-pointer* offset))))
-
- (defun parcil:readc ()
- (prog1 (parcil:peek) (incf *the-pointer*)))
-
- ;;; The PARCIL tokenizer. (FSA? What's an FSA?)
- ;;;
- (defun parcil:letter (c)
- (and (numberp c)
- (or (and (<= ?a c) (<= c ?z))
- (and (<= ?A c) (<= c ?Z)))))
-
- (defun parcil:digit (c)
- (and (numberp c) (<= ?0 c) (<= c ?9)))
-
- (defun parcil:ident (thing)
- (and thing
- (symbolp thing)
- (parcil:letter (elt (symbol-name thing) 0))))
-
- (defvar *binary-ops*
- '((\. ->) (* / %) (+ -) (<< >>) (< > <= >=) (== !=) (&) (^) (\|) (&&) (\|\|) (:)
- (= += -= *= /= %= &= ^= \|= >>= <<=)))
-
- ;;; Any binary operator in this alist will be renamed in the parsed version.
- (defvar *binop-alist*
- '((\. . struct-ref) (= . setf) (% . mod) (<< . ashl) (>> . ashr)
- (& . logand) (^ . logxor) (\| . logior) (&& . and) (\|\| . or)))
-
- (defvar *binop-inv-alist*
- (mapcar (lambda (x) (cons (cdr x) (car x)))
- *binop-alist*))
-
- (defun parcil:binop (s)
- ;; Was:
- ;; (member s *binary-ops* :test #'member)
- (let ((l *binary-ops*)
- (found ()))
- (while (and l (not found))
- (if (member s (car l))
- (setq found t)
- (setq l (cdr l))))
- l))
-
- (defun parcil:assignop (s)
- (member s (car (last *binary-ops*))))
-
-
- (defun parcil:priority (s)
- ;; (let ( (p (position s *binary-ops* :test #'member)) )
- ;; (and p (- 40 p)))
- (let ((p (parcil:binop s)))
- (and p (- 40 (- (length *binary-ops*)
- (length p))))))
-
-
- (defun parcil:translate-binop (op) (or (cdr (assoc op *binop-alist*)) op))
-
- (defun parcil:eat-spaces ()
- (do () ( (not (eql (parcil:peek) ? )) )
- (parcil:readc)))
-
- (defun parcil:syntax-error ()
- (error "Syntax error near %s" (substring *the-string* (max 0 (1- *the-pointer*)))))
-
- (defun parcil:parse-fixnum (&optional base)
- (if (null base) (setq base 10))
- (let ((cnt (string-match "[+-]?[0-9]+" *the-string* *the-pointer*)))
- (if (null cnt)
- (parcil:syntax-error))
- (setq cnt (match-end 0))
- (prog1
- (string-to-int (substring *the-string* *the-pointer* cnt))
- (setq *the-pointer* cnt))))
-
- ;;; The following is an attempt to get parcil to properly parse ".7".
- ;;; Unfortunately, this seems to be a losing battle, because it seems
- ;;; to have already snarfed up the . before starting...
- (defun parcil:parse-fixnum-hack (&optional base)
- (if (null base) (setq base 10))
- (let ((cnt (string-match "^[+-]?[0-9]+\\.?[0-9]*" *the-string* *the-pointer*)))
- (when (null cnt)
- (setq cnt (string-match "^[+-]?\\.[0-9]+" *the-string* *the-pointer*))
- (if (null cnt)
- (parcil:syntax-error)))
- (setq cnt (match-end 0))
- (prog1
- (string-to-int (substring *the-string* *the-pointer* cnt))
- (setq *the-pointer* cnt))))
-
- (defun parcil:parse-atom ()
- (parcil:eat-spaces)
- (if (parcil:eof)
- nil
- (let ( (c (parcil:peek)) )
- (cond ( (parcil:letter c) (parcil:parse-symbol) )
- ( (eql c ?0) (if (parcil:letter (parcil:peek 1))
- (parcil:parse-radix-integer)
- (parcil:parse-number)) )
- ( (parcil:digit c) (parcil:parse-number) )
- ( (or (eql c ?") (eql c ?')) (parcil:parse-string c) ) ;; Stick a " here to fool emacs hilight package.
- (t (parcil:parse-operator))))))
-
- (defun parcil:parse-symbol ()
- (intern
- (downcase
- (let ((s ""))
- (while (let ( (c (parcil:peek)) ) (and c (or (parcil:letter c) (parcil:digit c) (eql c ?_))))
- (setq s (concat s (char-to-string (parcil:readc)))))
- s))))
-
- (defun parcil:parse-radix-integer ()
- (parcil:readc)
- (parcil:parse-fixnum (let ((c (parcil:readc)))
- (cond ((= c ?x) 16)
- ((= c ?o) 8)
- ((= c ?b) 2)))))
-
- (defun parcil:parse-number ()
- (let* ( (n1 (parcil:parse-fixnum))
- (c (parcil:peek))
- (d 0.1))
- (cond ((eql c ?.)
- (parcil:decimal d n1 c))
- ((or (eql c ?e) (eql c ?E) (eql c ?d) (eql c ?D))
- (parcil:expt d n1 c))
- (t n1))))
-
- (defun parcil:decimal (d n1 c)
- (parcil:readc)
- (let ( (c (parcil:peek)) )
- (cond ((parcil:digit c)
- (incf n1 (* d (- c ?0)))
- (setq d (/ d 10))
- (parcil:decimal d n1 c))
- ((or (eql c ?e) (eql c ?E) (eql c ?d) (eql c ?D))
- (parcil:expt d n1 c))
- (t
- n1))))
-
- (defun parcil:expt (d n1 c)
- (parcil:readc)
- (let ( (e (parcil:parse-fixnum)) )
- (* n1 (expt 10 e))))
-
- (defun parcil:parse-string (terminator)
- (parcil:readc)
- (let ((s ""))
- (parcil:iterate parcil:loop ()
- (let ( (c (parcil:readc)) )
- (when (eql c terminator) (return-from parcil:loop s))
- (setq s (concat s c))
- (parcil:loop)))))
-
- ;;(defun parcil:parse-string (terminator)
- ;; (parcil:readc)
- ;; (parcil:parse-string-aux terminator "" (parcil:readc)))
-
- ;;(defun parcil:parse-string-aux (terminator s c)
- ;; (cond ((eql c terminator)
- ;; s)
- ;; (t (parcil:parse-string-aux terminator (concat s c) (parcil:readc)))))
-
- (defun parcil:parse-operator ()
- (let* ( (c (intern (char-to-string (parcil:readc))))
- (p (parcil:peek))
- (s (intern (format "%s%s" c (if p (char-to-string p) "")))))
- (cond ( (member s '(<< >>))
- (parcil:readc)
- (if (eql (parcil:peek) ?=)
- (intern (format "%s%c" s (parcil:readc)))
- s) )
- ( (member s '(++ -- << >> -> <= >= != == &&
- += -= *= /= %= &= ^= \|= \|\|))
- (parcil:readc)
- s )
- (t c))))
-
- ;;; Crufty interface to the tokenizer.
- ;;;
- (defvar *next*)
-
- (defun parcil:scan ()
- (setq *next* (parcil:parse-atom)))
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;
- ;;; The recursive-descent parser. Look Ma, no tables!
- ;;;
- (defun* parcil:parse-expression (&optional (priority -1))
- (parcil:iterate parcil:loop ( (result (parcil:parse-term)) )
- (let ( (op (parcil:translate-binop *next*))
- (new-priority (parcil:priority *next*)) )
- (cond
- ( (parcil:assignop *next*) (parcil:scan) (list op result (parcil:loop (parcil:parse-term))) )
- ( (and (parcil:binop *next*) (> new-priority priority))
- (parcil:scan) (parcil:loop (list op result (parcil:parse-expression new-priority))) )
- (t result)))))
-
- (defun* parcil:parse-arglist (&optional (terminator '\)) (separator '\,))
- (parcil:iterate parcil:loop ()
- (cond ( (null *next*) (error "Missing ~S" terminator) )
- ( (eq *next* terminator) (parcil:scan) nil )
- (t (let ( (arg1 (parcil:parse-expression)) )
- (unless (or (eq *next* separator) (eq *next* terminator))
- (parcil:syntax-error))
- (if (eq *next* separator) (parcil:scan))
- (cons arg1 (parcil:loop)))))))
-
- ;;; Any prefix unary operator included in this table will be renamed in the parsed
- ;;; version. (Postfix ++ and -- are handled specially, in PARCIL:PARSE-TERM.)
- (defvar *unary-op-alist*
- '((* . deref) (& . address-of)
- (- . -) (! . not) (~ . lognot)
- (++ . incf) (-- . decf)))
-
- (defvar *unary-op-inv-alist*
- (mapcar (lambda (x) (cons (cdr x) (car x)))
- *unary-op-alist*))
-
- ;;; This function parses what K&R call primary expressions. These include numbers,
- ;;; variables, structure references, array references, and all unary operators.
- ;;; Parsing of curly brackets is also stuck in here, though it probably shouldn't be.
- ;;; The weird precedence rules make this a fairly hariy and brittle piece of code.
- ;;;
- (defun parcil:parse-term ()
- (parcil:iterate parcil:loop ( (term (prog1 *next* (parcil:scan))) )
- ;; (insert (format "Next term is: %s\n" *next*))
- (cond
- ( (numberp term) term )
- ( (assoc term *unary-op-alist*)
- (list (cdr (assoc term *unary-op-alist*)) (parcil:parse-term)) )
- ( (eq term '\( )
- (cons 'progn (parcil:parse-arglist)) )
- ( (eq term '{)
- (list* 'let '() (parcil:parse-arglist '} '\;)) )
- ( (eq *next* '\( )
- (parcil:scan)
- (parcil:loop (cons term (parcil:parse-arglist))) )
- ( (eq *next* '\[ )
- (parcil:scan)
- (parcil:loop (` (aref (, term) (,@ (parcil:parse-arglist '\]))))) )
- ( (eq *next* '\.)
- (parcil:loop (` (struct-ref (, term) (, (prog1 (parcil:scan) (parcil:scan)))))) )
- ( (eq *next* '->)
- (parcil:loop (` (-> (, term) (, (prog1 (parcil:scan) (parcil:scan)))))) )
- ( (eq *next* '++)
- (parcil:scan)
- (parcil:loop (` (prog1 (, term) (incf (, term))))) )
- ( (eq *next* '--)
- (parcil:scan)
- (parcil:loop (` (prog1 (, term) (decf (, term))))) )
- (t
- (if (and (atom term) (not (parcil:ident term)))
- (parcil:syntax-error))
- term))))
-
- ;;; Useful for unparsing...
- (defun parcil:separate-with (s l)
- (cond ((null l) "")
- ((null (cdr l)) (format "%s" (car l)))
- (t (concat (format "%s%s" (car l) s)
- (parcil:separate-with s (cdr l))))))
-
- ;;;;;;;;;;;;;;;;;
- ;;;
- ;;; The top level
- ;;;
- (defun parcil (s)
- (parcil:parse-init s)
- (parcil:scan)
- (prog1
- (parcil:parse-expression)
- (if *next* (parcil:syntax-error)))) ; If there's stuff left over something went wrong.
-
-
- (defun unparcil (s)
- (cond ((atom s)
- (format "%s" s))
- ((eq (car s) 'progn)
- (if (null (cddr s))
- (unparcil (cadr s))
- (format "(%s)"
- (parcil:separate-with "," (mapcar 'unparcil (cdr s))))))
- ((eq (car s) 'prog1)
- (cond ((eq (car (nth 2 s)) 'incf)
- (format "(%s++)" (unparcil (nth 1 s))))
- ((eq (car (nth 2 s)) 'decf)
- (format "(%s--)" (unparcil (nth 1 s))))
- (t (error "found prog1 with neither incf nor decf"))))
- ((eq (car s) 'aref)
- (format "(%s[%s])"
- (unparcil (nth 1 s))
- (parcil:separate-with "," (mapcar 'unparcil (cddr s)))))
- ((and (assoc (car s) *unary-op-inv-alist*)
- (null (cddr s)))
- (format "(%s %s)" (cdr (assoc (car s) *unary-op-inv-alist*))
- (unparcil (cadr s))))
- ((assoc (car s) *binop-inv-alist*)
- (format "(%s %s %s)"
- (unparcil (nth 1 s))
- (cdr (assoc (car s) *binop-inv-alist*))
- (unparcil (nth 2 s))))
- ((parcil:binop (car s))
- (format "(%s %s %s)"
- (unparcil (nth 1 s))
- (car s)
- (unparcil (nth 2 s))))
- (t
- (format "(%s(%s))"
- (unparcil (nth 0 s))
- (parcil:separate-with ","
- (mapcar 'unparcil (cdr s)))))))
-
- (provide 'parcil)
-